document.write('') document.write('
\n
\n
\n
\n
\n \n\n
\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
<\/td>\n // Learn more about F# at http://fsharp.net<\/td>\n <\/tr>\n
<\/td>\n // See the 'F# Tutorial' project for more help.<\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n open System.IO<\/td>\n <\/tr>\n
<\/td>\n open SevenZip<\/td>\n <\/tr>\n
<\/td>\n open System<\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n SevenZipCompressor.SetLibraryPath(@"path/to/7z.dll")<\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n let dir = @"files/path/..."<\/td>\n <\/tr>\n
<\/td>\n let txts = Directory.GetFiles(dir + @"Test")<\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n let sz = SevenZip.SevenZipCompressor()<\/td>\n <\/tr>\n
<\/td>\n let st = System.Diagnostics.Stopwatch()<\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n sz.CompressionMethod <- CompressionMethod.Ppmd<\/td>\n <\/tr>\n
<\/td>\n sz.CompressionLevel <- CompressionLevel.Low<\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n let compress2 (f:byte[]) = <\/td>\n <\/tr>\n
<\/td>\n use mio = new MemoryStream(f)<\/td>\n <\/tr>\n
<\/td>\n use m2 = new MemoryStream(f.Length * 2) <\/td>\n <\/tr>\n
<\/td>\n sz.CompressStream(mio, m2)<\/td>\n <\/tr>\n
<\/td>\n m2.GetBuffer().[0..int m2.Length - 1]<\/td>\n <\/tr>\n
<\/td>\n <\/td>\n <\/tr>\n
<\/td>\n st.Start()<\/td>\n <\/tr>\n
<\/td>\n let compressionMap = txts |> Array.map (fun f -> Path.GetFileNameWithoutExtension f, f |> File.ReadAllBytes |> compress2) |> Map.ofArray<\/td>\n <\/tr>\n
<\/td>\n <\/td>\n <\/tr>\n
<\/td>\n let compredist f1 f2 n1 n2 =<\/td>\n <\/tr>\n
<\/td>\n let code = compressionMap.[n1]<\/td>\n <\/tr>\n
<\/td>\n let code2 = compressionMap.[n2]<\/td>\n <\/tr>\n
<\/td>\n let fxy = Array.append f1 f2<\/td>\n <\/tr>\n
<\/td>\n let code3 = compress2 (fxy) <\/td>\n <\/tr>\n
<\/td>\n float(code3.Length - (min (code.Length) (code2.Length))) / float(max (code.Length) (code2.Length)) <\/td>\n <\/tr>\n
<\/td>\n <\/td>\n <\/tr>\n
<\/td>\n let nearEdges = [| for f in txts -> let n1 = Path.GetFileNameWithoutExtension f <\/td>\n <\/tr>\n
<\/td>\n n1, <\/td>\n <\/tr>\n
<\/td>\n let fbytes = File.ReadAllBytes f <\/td>\n <\/tr>\n
<\/td>\n txts |> Array.map (fun fname -> <\/td>\n <\/tr>\n
<\/td>\n let n2 = Path.GetFileNameWithoutExtension fname<\/td>\n <\/tr>\n
<\/td>\n Path.GetFileNameWithoutExtension fname, compredist fbytes (File.ReadAllBytes(fname)) n1 n2) <\/td>\n <\/tr>\n
<\/td>\n |> Array.sortBy snd|]<\/td>\n <\/tr>\n
<\/td>\n st.Stop() <\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n let nearEdgesMap = nearEdges |> Map.ofArray<\/td>\n <\/tr>\n
<\/td>\n let pairs = nearEdgesMap |> Map.map (fun _ v -> Map.ofArray v)<\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n printfn "%A" nearEdges <\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n type 'a Tree =<\/td>\n <\/tr>\n
<\/td>\n | Node of 'a<\/td>\n <\/tr>\n
<\/td>\n | Branch of 'a Tree * 'a Tree<\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n type Cluster<'a when 'a : comparison> = <\/td>\n <\/tr>\n
<\/td>\n | Singleton of 'a Set<\/td>\n <\/tr>\n
<\/td>\n | Clusters of 'a Set * 'a Tree<\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n let completelinkage (ps:Map<'a, Map<'a,float>>) (a: 'a Set) (b:'a Set) = <\/td>\n <\/tr>\n
<\/td>\n a |> Set.map (fun item1 -> b |> Set.map (fun item2 -> ps.[item1].[item2]) <\/td>\n <\/tr>\n
<\/td>\n |> Set.maxElement) //we only want the two largest pair distances) <\/td>\n <\/tr>\n
<\/td>\n |> Set.maxElement<\/td>\n <\/tr>\n
<\/td>\n <\/td>\n <\/tr>\n
<\/td>\n let distclust ps = function <\/td>\n <\/tr>\n
<\/td>\n | Singleton (item), Clusters(items, _) -> completelinkage ps item items<\/td>\n <\/tr>\n
<\/td>\n | Clusters (items, _) , Singleton(item) -> completelinkage ps items item<\/td>\n <\/tr>\n
<\/td>\n | Clusters (items1, _), Clusters(items2, _) -> completelinkage ps items1 items2<\/td>\n <\/tr>\n
<\/td>\n | Singleton (item1) , Singleton(item2) -> ps.[item1.MaximumElement].[item2.MaximumElement]<\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n let mergeClusters = function <\/td>\n <\/tr>\n
<\/td>\n | Singleton (item), Clusters(items, dendogram) <\/td>\n <\/tr>\n
<\/td>\n | Clusters (items, dendogram) , Singleton(item) -> Clusters(Set.union item items, Branch(dendogram, Node item.MinimumElement))<\/td>\n <\/tr>\n
<\/td>\n | Clusters (items1, dendogram1), Clusters(items2, dendogram2) -> Clusters(Set.union items1 items2, Branch(dendogram1, dendogram2))<\/td>\n <\/tr>\n
<\/td>\n | Singleton (item1) , Singleton(item2) -> Clusters(Set.union item1 item2, Branch(Node item1.MinimumElement, Node item2.MinimumElement))<\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n let r = Random()<\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n (* <\/td>\n <\/tr>\n
<\/td>\n A function that takes a cluster and a set of clusters and finds the nearest item using cluster dist functions<\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n A function that takes a cluster and an item and calculates distance as maxdist (item, clustermember)<\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n There is a map that holds every item and its neighbiors<\/td>\n <\/tr>\n
<\/td>\n If we have an item we find the closest item by looking it up in the map.<\/td>\n <\/tr>\n
<\/td>\n But we also need to find the closest in the cluster. So we must compare the item to a cluster<\/td>\n <\/tr>\n
<\/td>\n To do this we for each cluster, compare the distance to our current item<\/td>\n <\/tr>\n
<\/td>\n If an item is closest we add the merged 2 to the cluster stack as a branch and remove the item from actives<\/td>\n <\/tr>\n
<\/td>\n If a cluster is closest we merge the item to the tree, remove it from the cluster stack and add the new tree to the stack<\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n If the next item we are looking at is a cluster we must find the closest item.<\/td>\n <\/tr>\n
<\/td>\n To find it in the single set we map each item to its distance from the cluster using dist clust<\/td>\n <\/tr>\n
<\/td>\n We also sort the cluster set by distance from current cluster<\/td>\n <\/tr>\n
<\/td>\n Again if the single item is the closest we merge with cluster and remove from map;<\/td>\n <\/tr>\n
<\/td>\n If the cluster is the closest we remove both clusters from clusterset, merge them and put them back<\/td>\n <\/tr>\n
<\/td>\n Recurse <\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n Item , Item -> Pack as a Singleton<\/td>\n <\/tr>\n
<\/td>\n *)<\/td>\n <\/tr>\n
<\/td>\n // (clusterset : Map<string, string Cluster>) <\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n let asCluster x = Singleton (set [x])<\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n let closestinActives distances cluster (item : string Set) = <\/td>\n <\/tr>\n
<\/td>\n item |> Set.map (fun s -> distclust distances (asCluster s, cluster), s)<\/td>\n <\/tr>\n
<\/td>\n |> Set.minElement <\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n let find points first closest (distances : Map<string, Map<string,float>>) = <\/td>\n <\/tr>\n
<\/td>\n let initialActives = points |> Set.ofArray |> Set.remove first <\/td>\n <\/tr>\n
<\/td>\n |> Set.remove closest <\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n let rec seek (stack : string Cluster list) (actives : string Set) = <\/td>\n <\/tr>\n
<\/td>\n let current = stack.Head <\/td>\n <\/tr>\n
<\/td>\n if stack.Length = 1 && actives = Set.empty then current<\/td>\n <\/tr>\n
<\/td>\n else <\/td>\n <\/tr>\n
<\/td>\n let nextDist, next = if actives.Count = 0 then Double.MaxValue,"" else closestinActives distances current actives<\/td>\n <\/tr>\n
<\/td>\n if stack.Length = 1 then seek (asCluster next :: stack) (actives.Remove(next))<\/td>\n <\/tr>\n
<\/td>\n else let topofstack = stack.Tail.Head <\/td>\n <\/tr>\n
<\/td>\n let stackDist = distclust distances (topofstack, current) <\/td>\n <\/tr>\n
<\/td>\n <\/td>\n <\/tr>\n
<\/td>\n if nextDist < stackDist then<\/td>\n <\/tr>\n
<\/td>\n seek (asCluster next :: stack) (actives.Remove(next))<\/td>\n <\/tr>\n
<\/td>\n else seek ((mergeClusters (current, topofstack)) :: (stack.Tail.Tail)) actives<\/td>\n <\/tr>\n
<\/td>\n <\/td>\n <\/tr>\n
<\/td>\n seek [asCluster closest ; asCluster first] initialActives<\/td>\n <\/tr>\n
<\/td>\n <\/td>\n <\/tr>\n
<\/td>\n let rec toGraph depth = function<\/td>\n <\/tr>\n
<\/td>\n | Node(x) -> x, "", " node\\r\\n [\\r\\n id\\t\\""+x+"\\"\\r\\n label\\t\\"" + x + "\\"\\r\\n ]\\r\\n"<\/td>\n <\/tr>\n
<\/td>\n | Branch(ltree,rtree) -> let lname, lgraph, names1 = toGraph (depth + 1) ltree<\/td>\n <\/tr>\n
<\/td>\n let rname, rgraph, names2 = toGraph (depth + 1) rtree<\/td>\n <\/tr>\n
<\/td>\n let name = string (r.Next(0, int(2. ** (float depth + 9.))) )<\/td>\n <\/tr>\n
<\/td>\n name, sprintf "%s\\r\\n%s\\r\\n edge\\r\\n [\\r\\n source\\t\\"%s\\"\\r\\n target\\t\\"%s\\"\\r\\n ]\\r\\n edge\\r\\n [\\r\\n source\\t\\"%s\\"\\r\\n target\\t\\"%s\\"\\r\\n ]" <\/td>\n <\/tr>\n
<\/td>\n lgraph rgraph name lname name rname, <\/td>\n <\/tr>\n
<\/td>\n (sprintf " node\\r\\n [\\r\\n id\\t\\"%s\\"\\r\\n label\\t\\"\\"\\r\\n ]\\r\\n" name) + names1 + names2<\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n <\/td>\n <\/tr>\n
<\/td>\n let first = fst nearEdges.[r.Next(0,txts.Length)]<\/td>\n <\/tr>\n
<\/td>\n let closest = fst nearEdgesMap.[first].[1]<\/td>\n <\/tr>\n
<\/td>\n let items, fcluster = (function | Clusters(leset, letree) -> leset, letree) (find (nearEdges |> Array.map fst) first closest pairs)<\/td>\n <\/tr>\n
<\/td>\n \n<\/td>\n <\/tr>\n
<\/td>\n let _, outgraph, nodes = toGraph 0 fcluster <\/td>\n <\/tr>\n
<\/td>\n <\/td>\n <\/tr>\n
<\/td>\n let n = "graph [" + nodes + outgraph + "]"<\/td>\n <\/tr>\n
<\/td>\n File.WriteAllText("mbook.gml", n)<\/td>\n <\/tr>\n
<\/td>\n <\/td>\n <\/tr>\n<\/table>\n\n\n <\/div>\n\n <\/div>\n \n<\/div>\n\n <\/div>\n